RRPlot and the Colon data set

Libraries

library(survival)
library(FRESA.CAD)
## Loading required package: Rcpp
## Loading required package: stringr
## Loading required package: miscTools
## Loading required package: Hmisc
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## Loading required package: pROC
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
#library(corrplot)
#source("~/GitHub/FRESA.CAD/R/RRPlot.R")
#source("~/GitHub/FRESA.CAD/R/PoissonEventRiskCalibration.R")
op <- par(no.readonly = TRUE)
pander::panderOptions('digits', 3)
#pander::panderOptions('table.split.table', 400)
pander::panderOptions('keep.trailing.zeros',TRUE)

The data set

data(cancer)
colon <- subset(colon,etype==1)
colon$etype <- NULL
rownames(colon) <- colon$id
colon$id <- NULL
colon <- colon[complete.cases(colon),]
time <- colon$time
status <- colon$status
data <- colon
data$time <- NULL
data$study <- NULL
table(data$status)

0 1 442 446

dataColon <- as.data.frame(model.matrix(status~.*age,data))
dataColon$`(Intercept)` <- NULL
dataColon$time <- time/365
dataColon$status <- status
colnames(dataColon) <-str_replace_all(colnames(dataColon),":","_")
colnames(dataColon) <-str_replace_all(colnames(dataColon),"\\.","_")
colnames(dataColon) <-str_replace_all(colnames(dataColon),"\\+","_")
data <- NULL

trainsamples <- sample(nrow(dataColon),0.7*nrow(dataColon))
dataColonTrain <- dataColon[trainsamples,]
dataColonTest <- dataColon[-trainsamples,]


pander::pander(table(dataColonTrain$status))
0 1
308 313
pander::pander(table(dataColonTest$status))
0 1
134 133

Modeling

ml <- BSWiMS.model(Surv(time,status)~1,data=dataColonTrain,NumberofRepeats = 10)

[++++++++++++++++++++++++++++++++++-++-+++++++++-]….

sm <- summary(ml)
pander::pander(sm$coefficients)
Table continues below
  Estimate lower HR upper u.Accuracy
extent 4.23e-01 1.223 1.526 1.905 0.564
node4 3.64e-01 1.133 1.439 1.828 0.593
age_node4 1.29e-03 1.000 1.001 1.002 0.593
rxLev_5FU_age -4.81e-03 0.992 0.995 0.998 0.557
rxLev_5FU -5.97e-02 0.902 0.942 0.984 0.557
nodes 3.81e-02 1.010 1.039 1.068 0.601
surg 2.35e-01 1.053 1.265 1.519 0.541
adhere 5.21e-07 1.000 1.000 1.000 0.528
age_nodes 1.44e-04 1.000 1.000 1.000 0.605
age_surg 2.03e-08 1.000 1.000 1.000 0.541
age_adhere 4.25e-09 1.000 1.000 1.000 0.528
age_extent 2.18e-09 1.000 1.000 1.000 0.528
Table continues below
  r.Accuracy full.Accuracy u.AUC r.AUC full.AUC
extent 0.611 0.621 0.561 0.612 0.622
node4 0.641 0.621 0.594 0.641 0.622
age_node4 0.591 0.607 0.594 0.592 0.609
rxLev_5FU_age 0.609 0.621 0.556 0.609 0.622
rxLev_5FU 0.589 0.607 0.556 0.591 0.609
nodes 0.617 0.621 0.602 0.618 0.622
surg 0.631 0.620 0.543 0.632 0.621
adhere 0.504 0.528 0.531 0.500 0.531
age_nodes 0.588 0.607 0.607 0.590 0.609
age_surg 0.504 0.541 0.543 0.500 0.543
age_adhere 0.504 0.528 0.531 0.500 0.531
age_extent 0.504 0.528 0.528 0.500 0.528
  IDI NRI z.IDI z.NRI Delta.AUC Frequency
extent 0.01708 0.237 3.74 4.20 0.00981 1.0
node4 0.00769 0.340 2.99 4.99 -0.01925 1.0
age_node4 0.00702 0.314 2.94 4.64 0.01675 1.0
rxLev_5FU_age 0.00924 0.223 2.89 2.99 0.01297 1.0
rxLev_5FU 0.00797 0.223 2.68 2.99 0.01726 1.0
nodes 0.00508 0.195 2.65 2.46 0.00435 1.0
surg 0.00703 0.171 2.53 2.39 -0.01111 1.0
adhere 0.00461 0.125 2.30 2.31 0.03114 0.7
age_nodes 0.00439 0.135 2.28 1.70 0.01881 1.0
age_surg 0.00594 0.171 2.24 2.39 0.04284 0.9
age_adhere 0.00363 0.125 2.07 2.31 0.03114 0.4
age_extent 0.00482 0.111 1.93 1.39 0.02777 0.4

Cox Model Performance

Here we evaluate the model using the RRPlot() function.

The evaluation of the raw Cox model with RRPlot()

Here we will use the predicted event probability assuming a baseline hazard for events withing 5 years

index <- predict(ml,dataColonTrain)
timeinterval <- 2*mean(subset(dataColonTrain,status==1)$time)

h0 <- sum(dataColonTrain$status & dataColonTrain$time <= timeinterval)
h0 <- h0/sum((dataColonTrain$time > timeinterval) | (dataColonTrain$status==1))

rdata <- cbind(dataColonTrain$status,ppoisGzero(index,h0))

rrAnalysisTrain <- RRPlot(rdata,atProb=c(0.90),
                     timetoEvent=dataColonTrain$time,
                     title="Raw Train: Colon Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

Uncalibrated Performance Report

pander::pander(t(rrAnalysisTrain$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.517 0.328 0.229 0.11697 0.496
RR 1.606 1.991 3.903 1.00000 1.607
SEN 0.272 0.757 0.984 1.00000 0.278
SPE 0.896 0.539 0.104 0.00649 0.893
BACC 0.584 0.648 0.544 0.50325 0.585
pander::pander(t(rrAnalysisTrain$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
0.971 0.866 1.08 0.636
pander::pander(t(rrAnalysisTrain$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
1.44 1.44 1.42 1.47
pander::pander(t(rrAnalysisTrain$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
1.29 1.29 1.29 1.3
pander::pander(rrAnalysisTrain$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.654 0.654 0.624 0.687
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.688 0.646 0.729
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.272 0.223 0.324
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.899 0.86 0.931
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90%
0.518
pander::pander(t(rrAnalysisTrain$RR_atP),caption="Risk Ratio")
Risk Ratio
est lower upper
1.61 1.39 1.86
pander::pander(rrAnalysisTrain$surdif,caption="Logrank test")
Logrank test Chisq = 53.285360 on 1 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 505 228 271.6 6.99 53.3
class=1 116 85 41.4 45.80 53.3

Cox Calibration

op <- par(no.readonly = TRUE)


calprob <- CoxRiskCalibration(ml,dataColonTrain,"status","time")

pander::pander(c(h0=calprob$h0,
                 Gain=calprob$hazardGain,
                 DeltaTime=calprob$timeInterval),
               caption="Cox Calibration Parameters")
h0 Gain DeltaTime
0.681 1.5 3.1

The RRplot() of the calibrated model

h0 <- calprob$h0
timeinterval <- calprob$timeInterval;

rdata <- cbind(dataColonTrain$status,calprob$prob)


rrAnalysisTrain <- RRPlot(rdata,atProb=c(0.90),
                     timetoEvent=dataColonTrain$time,
                     title="Calibrated Train: Colon",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

Calibrated Train Performance

pander::pander(t(rrAnalysisTrain$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.664 0.449 0.323 0.16999 0.499
RR 1.606 1.991 3.903 1.00000 1.576
SEN 0.272 0.757 0.984 1.00000 0.502
SPE 0.896 0.539 0.104 0.00649 0.724
BACC 0.584 0.648 0.544 0.50325 0.613
pander::pander(t(rrAnalysisTrain$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
0.794 0.708 0.887 2.57e-05
pander::pander(t(rrAnalysisTrain$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
0.998 0.998 0.982 1.01
pander::pander(t(rrAnalysisTrain$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
1.01 1.01 1.01 1.01
pander::pander(rrAnalysisTrain$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.654 0.654 0.623 0.686
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.688 0.646 0.729
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.272 0.223 0.324
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.899 0.86 0.931
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90%
0.665
pander::pander(t(rrAnalysisTrain$RR_atP),caption="Risk Ratio")
Risk Ratio
est lower upper
1.61 1.39 1.86
pander::pander(rrAnalysisTrain$surdif,caption="Logrank test")
Logrank test Chisq = 53.285360 on 1 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 505 228 271.6 6.99 53.3
class=1 116 85 41.4 45.80 53.3

Evaluating on the test set

The calibrated h0 and timeinterval were estimated on the training set

index <- predict(ml,dataColonTest)
rdata <- cbind(dataColonTest$status,ppoisGzero(index,h0))

rrAnalysisTest <- RRPlot(rdata,atThr = rrAnalysisTrain$thr_atP,
                     timetoEvent=dataColonTest$time,
                     title="Test: Colon Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

Test Performance

pander::pander(t(rrAnalysisTest$keyPoints),caption="Threshold values")
Threshold values
  @:0.66 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.664 0.496 0.393 0.19235 0.499
RR 1.758 2.028 2.480 1.00000 1.994
SEN 0.278 0.609 0.887 1.00000 0.594
SPE 0.918 0.739 0.366 0.00746 0.746
BACC 0.598 0.674 0.626 0.50373 0.670
pander::pander(t(rrAnalysisTest$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
0.792 0.663 0.939 0.00611
pander::pander(t(rrAnalysisTest$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
1.04 1.04 1.02 1.06
pander::pander(t(rrAnalysisTest$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
1.04 1.04 1.03 1.04
pander::pander(rrAnalysisTest$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.688 0.688 0.644 0.729
pander::pander(t(rrAnalysisTest$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.72 0.658 0.781
pander::pander((rrAnalysisTest$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.271 0.197 0.355
pander::pander((rrAnalysisTest$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.918 0.858 0.958
pander::pander(t(rrAnalysisTest$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90%
0.665
pander::pander(t(rrAnalysisTest$RR_atP),caption="Risk Ratio")
Risk Ratio
est lower upper
1.76 1.42 2.18
pander::pander(rrAnalysisTest$surdif,caption="Logrank test")
Logrank test Chisq = 32.110184 on 1 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 220 97 117.7 3.64 32.1
class=1 47 36 15.3 28.04 32.1

Cross-Validation

Here we will cross validate the training set and evaluate also on the testing set. The h0 and the timeinterval are the ones estimated on the calibration process

rcv <- randomCV(theData=dataColonTrain,
                theOutcome = Surv(time,status)~1,
                fittingFunction=BSWiMS.model, 
                trainFraction = 0.75,
                repetitions=50,
                classSamplingType = "Pro",
                testingSet=dataColonTest
         )

.[+++++].[+++++].[+++++++].[+++++].[+++].[+++++].[++++-].[++++-].[++++++++].[++++–]10 Tested: 852 Avg. Selected: 8.5 Min Tests: 1 Max Tests: 10 Mean Tests: 4.964789 . MAD: 0.4756285

.[++++++].[+++-].[++-].[+++].[++++-].[+++-].[++++++].[++–].[+++++++++].[+++++]20 Tested: 888 Avg. Selected: 8.3 Min Tests: 1 Max Tests: 20 Mean Tests: 9.527027 . MAD: 0.4763477

.[++-].[+++].[++–].[++++-].[++++].[++++].[+++++].[++++++++++]..[+++++].[+++–]30 Tested: 888 Avg. Selected: 8.066667 Min Tests: 2 Max Tests: 30 Mean Tests: 14.29054 . MAD: 0.4765761

.[+++-].[++++++].[++++++++].[++-].[++++-].[+++].[++++].[+++].[+++++++-].[+++++]40 Tested: 888 Avg. Selected: 8.35 Min Tests: 3 Max Tests: 40 Mean Tests: 19.05405 . MAD: 0.4764345

.[+-++-].[++–].[+++–].[++-].[+++++–].[++].[++++].[++–].[+++-].[++++++–]50 Tested: 888 Avg. Selected: 8.12 Min Tests: 4 Max Tests: 50 Mean Tests: 23.81757 . MAD: 0.4764308

stp <- rcv$survTestPredictions
stp <- stp[!is.na(stp[,4]),]

bbx <- boxplot(unlist(stp[,1])~rownames(stp),plot=FALSE)
times <- bbx$stats[3,]
status <- boxplot(unlist(stp[,2])~rownames(stp),plot=FALSE)$stats[3,]
prob <- ppoisGzero(boxplot(unlist(stp[,4])~rownames(stp),plot=FALSE)$stats[3,],h0)

rdatacv <- cbind(status,prob)
rownames(rdatacv) <- bbx$names
names(times) <- bbx$names

rrAnalysisCVTest <- RRPlot(rdatacv,atThr = rrAnalysisTrain$thr_atP,
                     timetoEvent=times,
                     title="CV Test: Colon Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

CV Test Performance

pander::pander(t(rrAnalysisCVTest$keyPoints),caption="Threshold values")
Threshold values
  @:0.66 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.665 0.478 0.383 0.3103 0.500
RR 1.644 1.693 2.481 45.6655 1.597
SEN 0.184 0.603 0.973 1.0000 0.482
SPE 0.943 0.658 0.102 0.0204 0.747
BACC 0.564 0.631 0.537 0.5102 0.614
pander::pander(t(rrAnalysisCVTest$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
0.769 0.699 0.844 7.64e-09
pander::pander(t(rrAnalysisCVTest$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
0.995 0.995 0.984 1.01
pander::pander(t(rrAnalysisCVTest$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
1.02 1.02 1.02 1.03
pander::pander(rrAnalysisCVTest$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.648 0.648 0.624 0.672
pander::pander(t(rrAnalysisCVTest$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.669 0.634 0.705
pander::pander((rrAnalysisCVTest$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.184 0.149 0.223
pander::pander((rrAnalysisCVTest$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.943 0.918 0.963
pander::pander(t(rrAnalysisCVTest$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90%
0.665
pander::pander(t(rrAnalysisCVTest$RR_atP),caption="Risk Ratio")
Risk Ratio
est lower upper
1.64 1.45 1.87
pander::pander(rrAnalysisCVTest$surdif,caption="Logrank test")
Logrank test Chisq = 62.124093 on 1 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 781 364 409.5 5.05 62.1
class=1 107 82 36.5 56.65 62.1